
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
import time
import requests
from hashlib import md5
import os
URL = 'https://www.toutiao.com'
 
class TouTiaoImg():
    def __init__(self):
        self.browser = webdriver.Chrome()
        self.wait = WebDriverWait(self.browser, 10)
        self.url = URL
 
    #打开首页，并搜索关键字
    def first_step(self, keyword):
        try:
            self.browser.get(self.url)
            inputs = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#root > div > div.search-container > div > div.search > input[type=text]'))) #搜索输入框
            button = self.wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#root > div > div.search-container > div > div.search > button'))) #搜索按钮
            inputs.send_keys(keyword)
            button.click()
            time.sleep(2)
            self.browser.switch_to_window(self.browser.window_handles[1]) #让程序切换到列表页
            time.sleep(2)
            self.second_step()
        except TimeoutException:
            print("timeout, retrying ...")
            self.first_step(keyword)
        finally:
            self.browser.close()
    
    #获取列表
    def second_step(self):
        box = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.feedBox div .sections')))
        i = 0
        while True:
            # 下拉
            self.browser.execute_script('window.scrollTo(0, document.body.scrollHeight)')
            try:
                imgItem = box.find_element_by_id('J_section_'+str(i))
            except NoSuchElementException:
                break
            try:
                imgItem.find_element_by_css_selector('div div .lbox')
                flag = True
            except NoSuchElementException:
                flag = False
            ele = imgItem.find_element_by_css_selector('div div .normal div div a')
            title = ele.find_element_by_css_selector('span').text
            ele.click() #点击进入选定的详情页
            time.sleep(2)
            # print(self.browser.window_handles)
            self.browser.switch_to_window(self.browser.window_handles[2]) #切换到详情页
            self.third_step(flag, title)
            self.browser.execute_script('window.close()') #关闭详情页
            # time.sleep(2)
            self.browser.switch_to_window(self.browser.window_handles[1]) #切换到列表页
            i += 1
 
    #获取详情页面内的图片
    def third_step(self, flag, title):
        print(title)
        # print(flag)
        try:
            if flag: #相册样式展示
                article = self.wait.until(EC.presence_of_element_located((By.TAG_NAME, 'article')))
                imgItems = article.find_elements_by_tag_name('img')
                # print(imgItems)
                for imgItem in imgItems:
                    url = imgItem.get_attribute('src')
                    print(url)
                    self.save_img(url, title)
            else: #文章类型展示
                imgBox = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'image-list')))
                imgItems = imgBox.find_elements_by_class_name('image-item')
                for imgItem in imgItems:
                    url = imgItem.find_element_by_css_selector('div a').get_attribute('href')
                    print(url)
                    self.save_img(url, title)
        except TimeoutException: #可能会遇到视频，所以报超时异常
            print('NO IMAGE')
        print("\n")
    
    #保存图片到本地        
    def save_img(self, url, dir):
        dirPath = 'C:/toutiaoImage/' + dir
        if not os.path.exists(dirPath):
            os.mkdir(dirPath)
        response = requests.get(url)
        save_name = dirPath + '/{}.jpg'.format(md5(response.content).hexdigest())
        if not os.path.exists(save_name):
            with open(save_name, 'wb') as f:
                f.write(response.content)
        
        
 
 
if __name__ == '__main__':
    tou = TouTiaoImg()
    tou.first_step('cosplay') #搜索关键字，随你更改